#!/usr/bin/env python
#coding=utf-8
import urllib2
import re
import time, datetime
import pymysql.cursors
import uuid


class Spider:
    '''
		金色财经快讯爬虫类
	'''
    classstr = "类成员变量"

    def __init__(self, date, level, item_list):
        self.date = date
        self.level = level
        self.item_list = item_list

    def load_page(self, url):
        '''
			发送url地址，返回html
		'''
        user_agent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
        headers = {"User_Agent": user_agent}
        req = urllib2.Request(url, headers=headers)
        response = urllib2.urlopen(req)
        html = response.read()

        html = html.replace("<br>", "")  #目的为过滤掉文章内容中的换行，提前过滤
        #用正则表达式对html进行过滤，获取日期内容列表和时间
        pattern = re.compile(
            r'<p class="live-time">(.*?)</p>.*?href="/lives/\d*.htm" target="_blank">(.*?)</a>',
            re.S)
        item_list = pattern.findall(html)
        #获取日期
        date1 = html.find('<ul class="lost" id="lost-')
        date1 = date1 + 26
        date = html[date1:date1 + 10]
        #获取星级
        bnum = html.find('<span class="star-bright">')
        lnum = html.find('<div class="live-info">')
        blhtml = html[bnum:lnum]
        level = blhtml.count('<span class="star-bright">')

        return date, level, item_list

    def jinse_spider(self):
        '''
			金色财经快讯爬虫
		'''
        #html=load_page("http://www.jinse.com/lives")
        #print html


#main
if __name__ == '__main__':
    #创建一下spider对象
    mySpider = Spider()
    #mySpider.jinse_spider()
    #the_page=mySpider.jinse_spider()
    date, level, item_list = mySpider.load_page("http://www.jinse.com/lives")
    #for items in item_list:
    items = item_list[0]
    content = items[1]
    uid = uuid.uuid1()
    uid = str(uid).replace("-", "")
    datatime = date + ' ' + items[0] + ':00'
    level = str(level)
    sqlstr = "insert into info_news_flash (id,content,create_date,level) values('%s','%s','%s','%s')" % (
        uid, content, datatime, level)

    #break

    #conn = pymysql.connect(host='localhost', port=3306, user='root', password='root', db='virtual_coin',charset="utf8", cursorclass=pymysql.cursors.DictCursor)
    #公司服务器：47.52.170.28我的测试服务器:42.51.201.237
    conn = pymysql.connect(host='42.51.201.237',
                           port=3306,
                           user='momochong0',
                           password='momochong0',
                           db='virtual_coin',
                           charset="utf8",
                           cursorclass=pymysql.cursors.DictCursor)
    #connect()方法用于创建与数据库的连接，里面可以指定参数，这一步只是连接到了数据库，操作数据库还需要下面的游标
    cur = conn.cursor()  #通过获取到的conn数据库的cursor方法创建游标

    qstr = "select id from info_news_flash where content='%s'" % (content)
    #print qstr
    #print "\r\n"
    cur.execute(qstr)
    data = cur.fetchone()
    if data == None:
        print "查询结果为空，进行数据插入".decode("utf-8").encode("gb2312") + "\r\n"
        #.decode("utf-8").encode("gb2312")
        #上一行代码跟在上上行后面，windows系统需要,linux不需要
        cur.execute(sqlstr)
        conn.commit()  #提交数据进数据库
    else:
        #print data
        print "查询结果已存在，不进行任何操作".decode("utf-8").encode("gb2312") + "\r\n"

    cur.close()
    conn.close()  #关闭数据库连接
